Input matrix:
Decomposition matrices:
Definition:
\begin{equation} X \approx F G \end{equation}Definition:
\begin{equation} X \approx F G = X W G \end{equation}where the columns of $F = X W$ are convex combinations of the features of $X$.
In [27]:
import pymf
import sys
import msaf
from msaf import utils as U
from msaf import jams2
import msaf.algorithms.cnmf3.segmenter as S
import msaf.algorithms.foote.segmenter as Foote
fig_dir = "/Users/uri/Dropbox/NYU/Dissertation/manuscript/4/figures/"
In [28]:
def cnmf(X, r, niter, fig=False, aspect="auto"):
nmf_mdl = pymf.CNMF(X, num_bases=r)
nmf_mdl.factorize(niter=niter)
F = np.asarray(nmf_mdl.W)
G = np.asarray(nmf_mdl.H)
W = np.asarray(nmf_mdl.G)
if fig:
plt.figure(figsize=(10,4))
plt.gcf().suptitle("C-NMF")
plt.subplot(1,3,1); plt.imshow(F, aspect=aspect, interpolation="nearest"); plt.title("F");
plt.subplot(1,3,2); plt.imshow(G, aspect=aspect, interpolation="nearest"); plt.title("G");
plt.subplot(1,3,3); plt.imshow(F * G, aspect=aspect, interpolation="nearest"); plt.title("X = F * G");
plt.show()
plt.figure(figsize=(10,5))
plt.subplot(1,4,1); plt.imshow(X, aspect=aspect, interpolation="nearest"); plt.title("X");
plt.subplot(1,4,2); plt.imshow(W, aspect=aspect, interpolation="nearest"); plt.title("W");
plt.subplot(1,4,3); plt.imshow(np.asmatrix(X) * W, aspect=aspect, interpolation="nearest"); plt.title("F = X * W");
plt.show()
return F, G, W
def nmf(X, r, niter, fig=False, aspect="auto"):
nmf_mdl = pymf.NMF(X, num_bases=r)
nmf_mdl.factorize(niter=niter)
F = np.asarray(nmf_mdl.W)
G = np.asarray(nmf_mdl.H)
if fig:
plt.figure(figsize=(10,4))
plt.gcf().suptitle("NMF")
plt.subplot(1,3,1); plt.imshow(F, aspect=aspect, interpolation="nearest"); plt.title("F");
plt.subplot(1,3,2); plt.imshow(G, aspect=aspect, interpolation="nearest"); plt.title("G");
plt.subplot(1,3,3); plt.imshow(F * G, aspect=aspect, interpolation="nearest"); plt.title("X = F * G");
return F, G
def read_ssm(file_path):
m = 13
dist = "correlation"
chroma, mfcc, tonnetz, beats, dur, anal = msaf.io.get_features(file_path, annot_beats=False)
F = U.lognormalize_chroma(chroma)
F = S.median_filter(F, M=m)
X = Foote.compute_ssm(F, metric=dist)
return X
def read_chroma(file_path, m=13):
dist = "correlation"
chroma, mfcc, tonnetz, beats, dur, anal = msaf.io.get_features(file_path, annot_beats=False)
F = U.lognormalize_chroma(chroma)
# F = F ** 2
F = S.median_filter(F, M=m)
return F
In [29]:
from scipy.cluster.vq import whiten, vq, kmeans
def get_kmeans_bounds(X, k, niter):
codebook, dist = kmeans(X, k, iter=niter)
codes, disto = vq(X, codebook)
# Find points where there's a cluster difference (boundary)
b = np.where(np.diff(codes) != 0)[0] + 1
return np.asarray(b)
In [26]:
N = 10
p = 6
r = 4
niter = 100
fig = True
aspect = 'auto'
np.random.seed(1)
X = np.random.random((N,p))
X = np.ones((N,p))
X[:4,:] = np.arange(p)
X[4:7,:] = np.arange(0,p*2,2)
X[7:,:] = np.arange(p)
# X[7:,:] = np.arange(0,p*3,3)
X = X.T
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_01_-_Help!.mp3")
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_01_-_Come_Together.mp3")
X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_05_-_And_I_Love_Her.mp3")
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_06_-_Tell_Me_Why.mp3")
# Factorize
F, G, W = cnmf(X, r, niter, fig=fig, aspect=aspect)
F, G = nmf(X, r, niter, fig=fig, aspect=aspect)
# Find boundaries using k-means
bounds = get_kmeans_bounds(X, r, niter)
print bounds
In [4]:
# Experiment against consistency
X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_01_-_Help!.mp3")
rank = 2
niter = 70
N = 100
def distNMF( x1, x2, y1, y2 ):
min1 = distMat(x1, y1)
min2 = distMat(x1, y2)
if min1 < min2:
m1 = min1
m2 = distMat(x2, y2)
else:
m1 = min2
m2 = distMat(x2, y1)
return m1 + m2
def distMat( X, Y ):
#return distance.euclidean(X,Y)
return np.linalg.norm( X - Y )
CNMF = []
NMF = []
for i in range(N):
cnmf = pymf.CNMF(X, num_bases=rank)
cnmf.factorize(niter=niter)
Wcnmf = np.asmatrix(cnmf.W)
Hcnmf = np.asmatrix(cnmf.H)
nmf = pymf.NMF(X, num_bases=rank)
nmf.factorize(niter=niter)
Wnmf = np.asmatrix(nmf.W)
Hnmf = np.asmatrix(nmf.H)
for r in range(rank):
CNMF.append(Wcnmf[:,r]*Hcnmf[r,:])
NMF.append(Wnmf[:,r]*Hnmf[r,:])
# Build distance matrix
RC = np.zeros((N,N))
RN = np.zeros((N,N))
M = N*N/2 - N/2
RCC = np.zeros((M))
RNN = np.zeros((M))
k = 0
for i in range(N):
for j in range(i):
RC[i,j] = distNMF( CNMF[2*i], CNMF[2*i+1], CNMF[2*j], CNMF[2*j+1] )
RN[i,j] = distNMF( NMF[2*i], NMF[2*i+1], NMF[2*j], NMF[2*j+1] )
RCC[k] = RC[i,j]
RNN[k] = RN[i,j]
k += 1
# fig = 10
# fig = plt.figure(fig)
# plt.clf()
# ax = fig.add_subplot(2, 1, 1)
# ax.imshow( RC, interpolation='nearest' )
# ax.set_title("C-NMF")
# ax = fig.add_subplot(2, 1, 2)
# ax.imshow( RN, interpolation='nearest' )
# ax.set_title("NMF")
histRNN = np.bincount(RNN.astype(int)) + 1
histRCC = np.ones(histRNN.shape)
h = np.bincount(RCC.astype(int)) + 1
histRCC[:h.shape[0]] = h
#print "histRNN", histRNN
#print "histRCC", histRCC
fig = 11
fig = plt.figure(fig, figsize=(5, 3))
plt.clf()
ax = fig.add_subplot(1, 1, 1)
#ax.plot( RCC )
ax.plot( histRCC, label='C-NMF' )
#ax.plot( np.sort(RCC)[::-1], label='C-NMF' )
#ax.plot( RNN )
ax.plot( histRNN, label='NMF' )
#ax.plot( np.sort(RNN)[::-1], label='NMF' )
ax.set_title("Consistency of C-NMF vs NMF")
ax.set_xlabel("Euclidean distance")
ax.set_ylabel("Count")
ax.set_yscale('log')
ax.legend()
plt.savefig('consistency.pdf', bbox_inches='tight')
plt.show()
In [241]:
from matplotlib import gridspec
X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_01_-_Help!.mp3")
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_01_-_I_Saw_Her_Standing_There.mp3")
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_CD1_-_11_-_Black_Bird.mp3")
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_05_-_Dig_It.mp3")
# X = read_ssm("/Users/uri/datasets/Segments/audio/Isophonics_05_-_And_I_Love_Her.mp3")
# X = read_chroma("/Users/uri/datasets/Segments/audio/Isophonics_01_-_Help!.mp3", m=9).T
X = read_chroma("/Users/uri/datasets/Segments/audio/Isophonics_05_-_And_I_Love_Her.mp3", m=9).T
# X = read_chroma("/Users/uri/datasets/Segments/audio/Isophonics_CD1_-_07_-_While_My_Guitar_Gently_Weeps.mp3", m=9).T
r = 4
niter = 300
fig = False
cmap = 'hot'
cF, cG, cW = cnmf(X, r, niter, fig=fig)
nF, nG = nmf(X, r, niter, fig=fig)
# plt.figure(figsize=(6,6))
# for rr in xrange(r):
# cD = cF[:,rr] * cG[rr, :]
# nD = nF[:,rr] * nG[rr, :]
# print rr
# plt.subplot(2,2,rr*2+1); plt.imshow(cD, interpolation="nearest")
# plt.title("C-NMF")
# plt.subplot(2,2,rr*2+2); plt.imshow(nD, interpolation="nearest")
# plt.title("NMF")
# plt.show()
plt.figure(figsize=(14,4))
ax1 = plt.subplot2grid((5,14), (1,0), colspan=3, rowspan=3)
ax2 = plt.subplot2grid((5,14), (0,3), colspan=1, rowspan=5)
ax3 = plt.subplot2grid((5,14), (2,4), colspan=3, rowspan=1)
ax4 = plt.subplot2grid((5,14), (0,7), colspan=1, rowspan=5)
ax5 = plt.subplot2grid((5,14), (2,8), colspan=3, rowspan=1)
ax6 = plt.subplot2grid((5,14), (1,11), colspan=3, rowspan=3)
ax1.imshow(X, interpolation="nearest", aspect="auto", cmap=cmap)
ax1.set_title("X")
ax2.imshow(cW, interpolation="nearest", aspect="auto", cmap=cmap)
ax2.set_title("W")
ax2.set_xticks(xrange(r))
ax2.set_xticklabels(np.arange(r)+1)
ax2.text(-3.5, cW.shape[0]/2., r'$\times$', fontsize=15)
ax2.text(r - .25, cW.shape[0]/2., r'$\times$', fontsize=15)
ax3.imshow(cG, interpolation="nearest", aspect="auto", cmap=cmap)
ax3.set_yticks(xrange(r))
ax3.set_yticklabels(np.arange(r)+1)
ax3.set_title("G")
ax4.text(-3.5, cF.shape[0]/2. - 0.5, r'$=$', fontsize=15)
ax4.imshow(cF, interpolation="nearest", aspect="auto", cmap=cmap)
ax4.text(r - .25, cF.shape[0]/2. - 0.5, r'$\times$', fontsize=15)
ax4.set_title("F")
ax4.set_xticks(xrange(r))
ax4.set_xticklabels(np.arange(r)+1)
ax5.imshow(cG, interpolation="nearest", aspect="auto", cmap=cmap)
ax5.set_title("G")
ax5.set_yticks(xrange(r))
ax5.set_yticklabels(np.arange(r)+1)
ax6.imshow(cF * cG, interpolation="nearest", aspect="auto", cmap=cmap)
ax6.text(-25, nF.shape[0]/2., r'$=$', fontsize=15)
ax6.set_title("X'")
plt.tight_layout()
plt.savefig('cnmf-example.pdf', bbox_inches='tight')
plt.show()
plt.figure(figsize=(7, 4))
ax1 = plt.subplot2grid((5,7), (0,0), colspan=1, rowspan=5)
ax2 = plt.subplot2grid((5,7), (2,1), colspan=3, rowspan=1)
ax3 = plt.subplot2grid((5,7), (1,4), colspan=3, rowspan=3)
ax1.imshow(nF, interpolation="nearest", aspect="auto", cmap=cmap)
ax1.set_title("F")
ax1.set_xticks(xrange(r))
ax1.set_xticklabels(np.arange(r)+1)
ax1.text(r - 0.5, nF.shape[0]/2., r'$\times$', fontsize=15)
ax2.imshow(nG, interpolation="nearest", aspect="auto", cmap=cmap)
ax2.set_title("G")
ax2.set_yticks(xrange(r))
ax2.set_yticklabels(np.arange(r)+1)
ax3.imshow(nF * nG, interpolation="nearest", aspect="auto", cmap=cmap)
ax3.text(-20, nF.shape[0]/2., r'$=$', fontsize=15)
ax3.set_title("X'")
plt.tight_layout()
plt.savefig('nmf-example.pdf', bbox_inches='tight')
plt.show()
In [34]:
# Example of extraction of boundaries using C-NMF
# in_path = "/Users/uri/datasets/Segments/audio/Isophonics_05_-_And_I_Love_Her.mp3"
# in_path = "/Users/uri/datasets/Segments/audio/Isophonics_01_-_Come_Together.mp3"
# in_path = "/Users/uri/datasets/Segments/audio/Isophonics_03_-_Lucy_In_The_Sky_With_Diamonds.mp3"
# in_path = "/Users/uri/datasets/Segments/audio/Isophonics_CD1_-_10_-_I'm_So_Tired.mp3"
# in_path = "/Users/uri/datasets/Segments/audio/Isophonics_06_-_She's_Leaving_Home.mp3"
in_path = "/Users/uri/datasets/Segments/audio/Isophonics_08_-_Strawberry_Fields_Forever.mp3"
jam_file = in_path.replace(msaf.Dataset.audio_dir, msaf.Dataset.references_dir)
jam_file = jam_file.replace("mp3", "jams")
chroma, mfcc, tonnetz, beats, dur, anal = msaf.io.get_features(in_path, annot_beats=False)
h = 14
R = 10
X = read_chroma(in_path, m=h).T
ann_bounds = msaf.io.read_ref_bound_frames(in_path, beats)
ann_inter, ann_labels = jams2.converters.load_jams_range(jam_file,
"sections", annotator=0, context=msaf.prefix_dict["Isophonics"])
print "Unique segments:", len(np.unique(ann_labels))
print "Annotated bounds: %s" % ann_bounds
print "Annotated labels: %s" % ann_labels
# logging.info("Estimated bounds: %s" % bound_idxs)
r = 4
niter = 300
fig = False
cmap = 'hot'
F, G, W = cnmf(X, r, niter, fig=fig)
G = G.T
orG = np.copy(G)
idx = np.argmax(G, axis=1)
max_idx = np.arange(G.shape[0])
max_idx = (max_idx, idx.flatten())
G[:, :] = 0
G[max_idx] = idx + 1
# TODO: Order matters?
oG = np.copy(G)
G = np.sum(G, axis=1)
mG = np.copy(G)
G = S.median_filter(G[:, np.newaxis], R)
bound_idxs = np.where(np.diff(G.flatten()) != 0)[0] + 1
# plt.imshow(X, interpolation="nearest", aspect="auto", cmap="hot"); plt.show()
plt.figure(figsize=(10, 5))
plt.subplot(2, 2, 1)
plt.imshow(orG.T, interpolation="nearest", aspect="auto", cmap="hot")
plt.title("(a) \n $G$")
plt.yticks(np.arange(r))
plt.xlabel("Time Frames")
plt.gca().set_yticklabels(np.arange(r)+1)
plt.subplot(2, 2, 2)
plt.imshow(oG.T, interpolation="nearest", aspect="auto", cmap="hot")
plt.title("(b) \n $\mathcal{G}$")
plt.yticks(np.arange(r))
plt.xlabel("Time Frames")
plt.gca().set_yticklabels(np.arange(r)+1)
plt.subplot(2, 2, 3)
plt.imshow(mG[:,np.newaxis].T, interpolation="nearest", aspect="auto", cmap="hot")
plt.title("(c) \n $\mathbf{g}$")
plt.yticks([])
plt.xlabel("Time Frames")
plt.subplot(2, 2, 4)
plt.imshow(G.T, interpolation="nearest", aspect="auto", cmap="hot")
plt.title("(d) \n $\mathbf{g}'$")
plt.yticks([])
plt.xlabel("Time Frames")
for ann_bound in ann_bounds[2:-2]:
plt.axvline(ann_bound, ymin=0, ymax=1, linewidth=2.0, color="g")
plt.tight_layout()
plt.savefig(fig_dir + "cnmf-boundaries-example.pdf")
plt.show()
In [47]:
# Labels
print bound_idxs
print ann_labels
def most_frequent(x):
"""Returns the most frequent value in x."""
return np.argmax(np.bincount(x))
def fill_frames(labels, bounds, N):
"""Fills up the frames with label ids."""
label_frames = np.zeros(N)
all_bounds = [0] + bounds.tolist()
for i, (start, end) in enumerate(zip(all_bounds[:-1], all_bounds[1:])):
label_frames[start:end] = labels[i]
label_frames[all_bounds[-1]:] = labels[-1]
return label_frames
ann_labels = MSAF.read_annot_int_labels(in_path)
print ann_labels
F, G, W = cnmf(X, r+1, niter, fig=fig)
G = G.T
orG = np.copy(G)
idx = np.argmax(G, axis=1)
max_idx = np.arange(G.shape[0])
max_idx = (max_idx, idx.flatten())
G[:, :] = 0
G[max_idx] = idx + 1
# TODO: Order matters?
oG = np.copy(G)
G = np.sum(G, axis=1)
mG = np.copy(G)
G = S.median_filter(G[:, np.newaxis], R-2)
N = len(G.flatten())
label_frames = G.flatten()
label_frames = np.asarray(label_frames, dtype=int)
est_labels = [label_frames[0]]
bound_inters = zip(bound_idxs[:-1], bound_idxs[1:])
for bound_inter in bound_inters:
if bound_inter[1] - bound_inter[0] <= 0:
est_labels.append(np.max(label_frames) + 1)
else:
est_labels.append(most_frequent(
label_frames[bound_inter[0]: bound_inter[1]]))
#print bound_inter, labels[-1]
est_labels.append(label_frames[-1])
label_frames = fill_frames(est_labels, bound_idxs, N)
ann_label_frames = fill_frames(ann_labels, ann_bounds[1:-1], N)
# PLOT
plt.figure(figsize=(10, 5))
plt.subplot(2, 1, 1)
pre_L = np.vstack((old_label_frames, G.T))
plt.imshow(pre_L, interpolation="nearest", aspect="auto", cmap="hot")
for bound_idx in bound_idxs:
plt.axvline(bound_idx, ymin=0, ymax=1, linewidth=2.0, color="b")
plt.yticks([0,1])
plt.gca().set_yticklabels(["$\mathbf{g}'$", "$\mathbf{g}'_{r'}$"])
plt.xlabel("Time Frames")
plt.subplot(2, 1, 2)
L = np.vstack((label_frames[:, np.newaxis].T, ann_label_frames[:, np.newaxis].T))
plt.imshow(L, interpolation="nearest", aspect="auto", cmap="hot")
plt.yticks([0,1])
plt.gca().set_yticklabels(["Estimated", "Annotated"])
plt.xlabel("Time Frames")
plt.tight_layout()
plt.savefig("cnmf-labels-example.pdf")
plt.show()
print est_labels
print ann_labels
In [40]:
track_path = "/Users/uri/datasets/Segments/audio/Isophonics_05_-_And_I_Love_Her.mp3"
Xm = read_chroma(track_path, m=9).T
X = read_chroma(track_path, m=1).T
pitches = ["A", "A#", "B", "C", "C#", "D", "D#", "E", "F" ,"F#", "G", "G#"]
plt.figure(figsize=(9,6))
plt.subplot(2, 1, 1)
plt.imshow(X, interpolation="nearest", aspect="auto", cmap="hot")
plt.yticks(np.arange(12))
plt.gca().set_yticklabels(pitches)
plt.xlabel("Time Frames")
plt.subplot(2, 1, 2)
plt.imshow(Xm, interpolation="nearest", aspect="auto", cmap="hot")
plt.yticks(np.arange(12))
plt.gca().set_yticklabels(pitches)
plt.xlabel("Time Frames")
plt.tight_layout()
plt.savefig("chromagram-example.pdf")
plt.show()